/* surveybias.ado --- 
 * 
 * Filename: surveybias.ado
 * Description: 
*! Author: Kai Arzheimer & Jocelyn Evans
 * Maintainer: 
 * Created: Sat Apr 30 22:53:55 2011 (+0200)
*! version 0.64 January 11, 2013 @ 12:47:25
 * Last-Updated: Fri Jan 11 12:47:25 2013 (+0100)
 *           By: Kai Arzheimer
 *     Update #: 615
 * URL: 
 * Keywords: 
 * Compatibility: 
 * 
 */

/* Commentary: 
 * 
 * 
 * 
 */

/* Change log:
 * 
 * 
 */

/* Code: */


*! Calculate A, A', B and Chi-2

program define surveybias, eclass byable(recall)
version 11
	if !replay() {
		syntax varname(numeric)  [if] [in] [fweight pweight iweight], POPvalues(numlist min=2 max=12 ) [VERBose] [Level(cilevel)]
		marksample touse
		local commandline   "surveybias `*'"
* Take care of weights
		if "`weight'" != "" {
			local weighting "[`weight' `exp']"
			}
* unique values + integer values
		qui inspect `varlist' if `touse'
		local nunique=r(N_unique)
		tempname intvalues
		scalar `intvalues' = r(N_posint) + r(N_negint)
* Min/max
		qui summarize `varlist' if `touse'
		local mincode=`r(min)' 
		local maxcode=`r(max)'

* Test for problems with variable
	
		if `nunique' <2 {
			display as error "There is no variation in `varlist'"
			error 409
			* No variation in variable
		}
		else if `nunique' >10 {
			display as error "`varlist' has too many (`nunique') unique values"
			error 149
		}
		else if `r(N)' > `intvalues' {
			display as error "`varlist' has non-integer values"
			exit 498
			}
		else if `nunique' > (`maxcode' - `mincode' +1) {
			display "Warning: category codes non-consecutive"
			}

* Test for problems with numlist
		local listnum : word count `popvalues'
		if `listnum' != `nunique' {
			display as error "Number of categories in  sample (`nunique') and population (`listnum') do not match"
			exit 498
			}

* Test for non-positive values in numlist
		foreach value of local popvalues  {
			if `value'  <=0 {
				display as error "All proportions must be strictly positive"
				error 125
				}
			}
		
		if "`verbose'" != "" {
			dis as txt "Information on `varlist'"	
			codebook `varlist'	
			dis as txt "Please make sure that `varlist' is correctly labelled, and that"
			dis as txt "the sequence of population values matches `varlist' "
			dis _n
			}
* Normalise information on population to proportions

* Calculate sum
		local popvalsum = subinstr("`popvalues'"," "," + ",.)
* Force evaluation of sum	
		local popvalsum = `popvalsum'
* Normalise
		local normpopvalues = ""
		foreach v of local popvalues {
			local normedval =  `v' / `popvalsum'
			local normpopvalues `normpopvalues' `normedval'
			}


* Run empty MLN
		qui mlogit `varlist' `weighting' if `touse', base(1)
* Save number of complete cases
		local cases = e(N)
	
* Categories need not be consecutive, so we cannot simply loop from 1 to n but need an index
	
		qui levelsof `varlist',local(surveycats)
	
* Begin building terms for nlcom. Start with common denominator. Depends on number of categories
* nlcom has convergence problems. Try mulitplying everything with a constant to ease convergence
		local multiplier = 100
		local cat : word 1 of `surveycats'
		local denomterm exp([`cat']_cons)
		forvalues index = 2/`listnum' {
			local cat : word `index' of `surveycats'
			local denomterm `denomterm' + exp([`cat']_cons)
			}
		local bterm = "0"
		local bwterm = "0"		
* Loop over categories
* Build terms for nlcom calculations		
		forvalues index = 1/`listnum' {
			local thispopval : word `index' of `normpopvalues'
			local cat : word `index' of `surveycats'
* Accumulate terms for individual A's			
			local nlterms `nlterms' (ln((exp([`cat']_cons) / ( `denomterm' ) )  / (1 - (exp([`cat']_cons) / ( `denomterm' )) ) * ((1 - `thispopval') / `thispopval'))* `multiplier')

* Accumlate terms for B			
			local bterm `bterm' + abs((ln((exp([`cat']_cons) / ( `denomterm' ) )  / (1 - (exp([`cat']_cons) / ( `denomterm' )) ) * ((1 - `thispopval') / `thispopval'))*`multiplier'))

* Accumlate terms for B_w
			local bwterm `bwterm' + (abs((ln((exp([`cat']_cons) / ( `denomterm' ) )  / (1 - (exp([`cat']_cons) / ( `denomterm' )) ) * ((1 - `thispopval') / `thispopval')))) * `thispopval' * `multiplier')
			}


		local bterm ((`bterm') / `nunique')
		local bwterm ((`bwterm'))

		qui nlcom `nlterms' `bterm' `bwterm', post

* Relabel matrices
* Collect labels/numbers in macro
		foreach s of local surveycats {
			local valueorlabel : label (`varlist') `s'
			local surveycatlabels `surveycatlabels' A':`valueorlabel'
			}
		local surveycatlabels `surveycatlabels' B:B
		local surveycatlabels `surveycatlabels' B:B_w
	
* Coefficients
		matrix aprimes = e(b) / `multiplier'
		matrix rownames aprimes=A'
		matrix colnames aprimes=`surveycatlabels'

* Covariances
		matrix covs = e(V) / (`multiplier' * `multiplier')
		matrix rownames covs=`surveycatlabels'
		matrix colnames covs=`surveycatlabels'


* Is there a significant difference between the distribution in the sample and the known distribution in the population?
* Run a chi-square test
		tempname observed
		tempname chisqp
		tempname chisqlr
		tempname df
		tempname pp
		tempname plr
		qui tab `varlist',matcell(`observed')
		matrix `observed' = `observed''
		mata: calcchisquare()
		scalar `df' = e(rank)
		scalar `pp' = chi2tail(`df',`chisqp')
		scalar `plr' = chi2tail(`df',`chisqlr')


				

		
* Prepare for posting
	

		ereturn post aprimes covs,depname(`varlist') esample(`touse') obs(`cases')

* Pearson Chi-2
		ereturn scalar chi2p =  `chisqp'
		ereturn scalar df = `df'
		ereturn scalar pp = `pp'
* LR Chi-2
		ereturn scalar chi2lr =  `chisqlr'
		ereturn scalar plr = `plr'
		
		ereturn local cmdline `commandline'
		ereturn local cmd "surveybias"
		}
	else { //replay
		syntax [, Level(cilevel)]
		}
	ereturn display, level(`level')
	display as text " "
	display as text _col(5) "Ho: no bias"

	display as text _continue _col(5) "Degrees of freedom: "
	display as result  _col(25) e(df)

	display as text _continue _col(5) "Chi-square (Pearson) = "
	display as result  _col(26) e(chi2p)
	display as text _continue _col(5) "Pr (Pearson) = "
	display as result _col(18) e(pp)

	display as text _continue _col(5) "Chi-square (LR) = "
	display as result   _col(20) e(chi2lr)
	display as text _continue _col(5) "Pr (LR) = "
	display as result _col(13) e(plr)

* Small sample size?

	if `cases' < 100 {
		display _n "Warning: The effective sample size is very small (n=`cases')."
		display "Chi square values may be unrealiable."
		display "Consider using mgof (ssc describe mgof) for exact tests."
		}

end
mata:
	void calcchisquare()
	{
		myvalidcases = strtoreal(tokens(st_local("cases")))
		expected = strtoreal(tokens(st_local("normpopvalues")))*myvalidcases
		tempstring = st_local("observed")
		/* get observed values */
		observed = st_matrix(tempstring)
		/* calculate Pearson chisquare and LR chisquare */
		chisqp = sum(((observed :- expected):^2) :/ expected)
		chisqlr = 2*sum(ln(observed :/ expected):*observed)
		/* return chisq in temp macro */
		chitempstringp = st_local("chisqp")
		chitempstringlr = st_local("chisqlr")
		st_numscalar(chitempstringp,chisqp)
		st_numscalar(chitempstringlr,chisqlr)
		}
	
end

/* surveybias.ado ends here */
